bitkeeper revision 1.1108.34.1 (4107ea3eSSzRurUHPnjryAZ3X8VXbg)
authorgm281@boulderdash.cl.cam.ac.uk <gm281@boulderdash.cl.cam.ac.uk>
Wed, 28 Jul 2004 18:02:38 +0000 (18:02 +0000)
committergm281@boulderdash.cl.cam.ac.uk <gm281@boulderdash.cl.cam.ac.uk>
Wed, 28 Jul 2004 18:02:38 +0000 (18:02 +0000)
New locking scheme for schedulers

xen/common/domain.c
xen/common/sched_bvt.c
xen/common/sched_fair_bvt.c
xen/common/sched_rrobin.c
xen/common/schedule.c
xen/include/xen/sched.h

index 111210c5e7ced04f6031733fae8c976f52f64416..3a1bfc2e7c9665e9a6b366f63046ae7307c8039c 100644 (file)
@@ -40,7 +40,9 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu)
     d->domain    = dom_id;
     d->processor = cpu;
     d->create_time = NOW();
-
+    /* Initialise the state_lock */
+    spin_lock_init(&d->state_lock);
     memcpy(&d->thread, &idle0_task.thread, sizeof(d->thread));
 
     if ( d->domain != IDLE_DOMAIN_ID )
index e4498359e6f1ee701901c3d72bf4d9fd6c47e802..c03f2a78d376eaa9266dcbb68253b66ddc511e0a 100644 (file)
@@ -45,6 +45,7 @@ struct bvt_dom_info
 
 struct bvt_cpu_info
 {
+    spinlock_t          run_lock;   /* protects runqueue */
     struct list_head    runqueue;   /* runqueue for given processor */ 
     unsigned long       svt;        /* XXX check this is unsigned long! */
 };
@@ -148,15 +149,86 @@ int bvt_init_idle_task(struct domain *p)
 
     bvt_add_task(p);
 
-    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
+    spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);
+    
     set_bit(DF_RUNNING, &p->flags);
     if ( !__task_on_runqueue(RUNLIST(p)) )
         __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
-    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
+        
+    spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
 
     return 0;
 }
 
+void bvt_wake(struct domain *d)
+{
+    unsigned long       flags;
+    struct bvt_dom_info *inf = BVT_INFO(d);
+    struct domain       *curr;
+    s_time_t            now, min_time;
+    int                 cpu = d->processor;
+
+    /* The runqueue accesses must be protected */
+    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
+    
+    /* If on the runqueue already then someone has done the wakeup work. */
+    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
+    {
+        spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
+        return;
+    }
+
+    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor));
+
+    now = NOW();
+
+    /* Set the BVT parameters. */
+    if ( inf->avt < CPU_SVT(cpu) )
+        inf->avt = CPU_SVT(cpu);
+
+    /* Deal with warping here. */
+    inf->warpback  = 1;
+    inf->warped    = now;
+    __calc_evt(inf);
+    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
+    
+    /* Access to schedule_data protected by schedule_lock */
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+    
+    curr = schedule_data[cpu].curr;
+
+    /* Currently-running domain should run at least for ctx_allow. */
+    min_time = curr->lastschd + curr->min_slice;
+
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);   
+   
+    if ( is_idle_task(curr) || (min_time <= now) )
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
+        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
+
+}
+
+
+static void bvt_sleep(struct domain *d)
+{
+    unsigned long flags;
+    
+    if ( test_bit(DF_RUNNING, &d->flags) )
+        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+    else 
+    {
+        /* The runqueue accesses must be protected */
+        spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags);
+        
+        
+        if ( __task_on_runqueue(RUNLIST(d)) )
+            __del_from_runqueue(RUNLIST(d));
+
+        spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags);    
+    }
+}
+
 /**
  * bvt_free_task - free BVT private structures for a task
  * @p:             task
@@ -218,7 +290,7 @@ int bvt_adjdom(struct domain *p,
         if ( mcu_adv == 0 )
             return -EINVAL;
         
-        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
+        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
         inf->mcu_advance = mcu_adv;
         inf->warp = warp;
         inf->warpl = warpl;
@@ -229,18 +301,18 @@ int bvt_adjdom(struct domain *p,
                 p->domain, inf->mcu_advance, inf->warp,
                 inf->warpl, inf->warpu );
 
-        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
+        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
     }
     else if ( cmd->direction == SCHED_INFO_GET )
     {
         struct bvt_dom_info *inf = BVT_INFO(p);
 
-        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
+        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
         params->mcu_adv = inf->mcu_advance;
         params->warp    = inf->warp;
         params->warpl   = inf->warpl;
         params->warpu   = inf->warpu;
-        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
+        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
     }
     
     return 0;
@@ -256,6 +328,7 @@ int bvt_adjdom(struct domain *p,
  */
 static task_slice_t bvt_do_schedule(s_time_t now)
 {
+    unsigned long flags;
     struct domain *prev = current, *next = NULL, *next_prime, *p; 
     struct list_head   *tmp;
     int                 cpu = prev->processor;
@@ -269,8 +342,12 @@ static task_slice_t bvt_do_schedule(s_time_t now)
                         *next_prime_inf = NULL;
     task_slice_t        ret;
 
+
     ASSERT(prev->sched_priv != NULL);
     ASSERT(prev_inf != NULL);
+    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
+
+    ASSERT(__task_on_runqueue(RUNLIST(prev)));
 
     if ( likely(!is_idle_task(prev)) ) 
     {
@@ -329,7 +406,9 @@ static task_slice_t bvt_do_schedule(s_time_t now)
         if ( p_inf->avt < min_avt )
             min_avt = p_inf->avt;
     }
-
+    
+    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
     /* Extract the domain pointers from the dom infos */
     next        = next_inf->domain;
     next_prime  = next_prime_inf->domain;
@@ -341,8 +420,10 @@ static task_slice_t bvt_do_schedule(s_time_t now)
     /* check for virtual time overrun on this cpu */
     if ( CPU_SVT(cpu) >= 0xf0000000 )
     {
-        u_long t_flags; 
+        u_long t_flags;
+        
         write_lock_irqsave(&tasklist_lock, t_flags); 
+        
         for_each_domain ( p )
         {
             if ( p->processor == cpu )
@@ -352,7 +433,9 @@ static task_slice_t bvt_do_schedule(s_time_t now)
                 p_inf->avt -= 0xe0000000;
             }
         } 
+        
         write_unlock_irqrestore(&tasklist_lock, t_flags); 
+        
         CPU_SVT(cpu) -= 0xe0000000;
     }
 
@@ -411,7 +494,7 @@ static void bvt_dump_cpu_state(int i)
     struct bvt_dom_info *d_inf;
     struct domain *d;
     
-    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
+    spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags);
     printk("svt=0x%08lX ", CPU_SVT(i));
 
     queue = RUNQUEUE(i);
@@ -430,7 +513,7 @@ static void bvt_dump_cpu_state(int i)
             (unsigned long)list, (unsigned long)list->next,
             (unsigned long)list->prev);
     }
-    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);        
+    spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags);        
 }
 
 /* We use cache to create the bvt_dom_infos 
@@ -452,14 +535,16 @@ int bvt_init_scheduler()
     for ( i = 0; i < NR_CPUS; i++ )
     {
         schedule_data[i].sched_priv = xmalloc(sizeof(struct bvt_cpu_info));
-        INIT_LIST_HEAD(RUNQUEUE(i));
-        
+       
         if ( schedule_data[i].sched_priv == NULL )
         {
             printk("Failed to allocate BVT scheduler per-CPU memory!\n");
             return -1;
         }
 
+        INIT_LIST_HEAD(RUNQUEUE(i));
+        spin_lock_init(&CPU_INFO(i)->run_lock);
+        
         CPU_SVT(i) = 0; /* XXX do I really need to do this? */
     }
 
@@ -476,48 +561,7 @@ int bvt_init_scheduler()
     return 0;
 }
 
-static void bvt_sleep(struct domain *d)
-{
-    if ( test_bit(DF_RUNNING, &d->flags) )
-        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
-    else if ( __task_on_runqueue(RUNLIST(d)) )
-        __del_from_runqueue(RUNLIST(d));
-}
 
-void bvt_wake(struct domain *d)
-{
-    struct bvt_dom_info *inf = BVT_INFO(d);
-    struct domain       *curr;
-    s_time_t             now, min_time;
-    int                  cpu = d->processor;
-
-    /* If on the runqueue already then someone has done the wakeup work. */
-    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
-        return;
-
-    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(d->processor));
-
-    now = NOW();
-
-    /* Set the BVT parameters. */
-    if ( inf->avt < CPU_SVT(cpu) )
-        inf->avt = CPU_SVT(cpu);
-
-    /* Deal with warping here. */
-    inf->warpback  = 1;
-    inf->warped    = now;
-    __calc_evt(inf);
-
-    curr = schedule_data[cpu].curr;
-
-    /* Currently-running domain should run at least for ctx_allow. */
-    min_time = curr->lastschd + curr->min_slice;
-    
-    if ( is_idle_task(curr) || (min_time <= now) )
-        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
-    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
-        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
-}
 
 struct scheduler sched_bvt_def = {
     .name     = "Borrowed Virtual Time",
index a8a60bafd1330f92dc99d07dd06c7d9e4e71c024..8659ab1daea502e4c2e62d3d0760d06e99e64794 100644 (file)
@@ -52,6 +52,7 @@ struct fbvt_dom_info
 
 struct fbvt_cpu_info
 {
+    spinlock_t          run_lock;  /* protects runqueue */
     struct list_head    runqueue;  /* runqueue for this CPU */
     unsigned long       svt;       /* XXX check this is unsigned long! */
     u32                 vtb;       /* virtual time bonus */
@@ -160,15 +161,122 @@ int fbvt_init_idle_task(struct domain *p)
     if(fbvt_alloc_task(p) < 0) return -1;
 
     fbvt_add_task(p);
-    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
+    spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);
     set_bit(DF_RUNNING, &p->flags);
     if ( !__task_on_runqueue(RUNLIST(p)) )
     __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
-    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
+    spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
 
     return 0;
 }
                                         
+static void fbvt_wake(struct domain *d)
+{
+    unsigned long        flags;
+    struct fbvt_dom_info *inf = FBVT_INFO(d);
+    struct domain        *curr;
+    s_time_t             now, min_time;
+    int                  cpu = d->processor;
+    s32                  io_warp;
+
+    /* The runqueue accesses must be protected */
+    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
+    
+    /* If on the runqueue already then someone has done the wakeup work. */
+    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
+    {
+        spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags); 
+        return;
+    }    
+    
+    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
+    now = NOW();
+
+#if 0
+    /*
+     * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing
+     * to do, in light of the stuff that fbvt_wake_up() does.
+     * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test
+     * 'inf->avt < CPU_SVT(cpu)' redundant!
+     */
+    if ( d->domain == IDLE_DOMAIN_ID )
+    {
+        inf->avt = inf->evt = ~0U;
+    } 
+    else 
+    {
+        /* Set avt to system virtual time. */
+        inf->avt = CPU_SVT(cpu);
+        /* Set some default values here. */
+        LAST_VTB(cpu) = 0;
+        __calc_evt(inf);
+    }
+#endif
+
+    /* Set the BVT parameters. */
+    if ( inf->avt < CPU_SVT(cpu) )
+    {
+        /*
+         * We want IO bound processes to gain dispatch precedence. It is 
+         * especially for device driver domains. Therefore AVT 
+         * not be updated to SVT but to a value marginally smaller.
+         * Since frequently sleeping domains have high time_slept
+         * values, the virtual time can be determined as:
+         * SVT - const * TIME_SLEPT
+         */
+        io_warp = (int)(0.5 * inf->time_slept);
+        if ( io_warp > 1000 )
+            io_warp = 1000;
+
+        ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp);
+        inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp;
+        inf->avt = CPU_SVT(cpu) - io_warp;
+    }
+
+    /* Deal with warping here. */
+    inf->warpback  = 1;
+    inf->warped    = now;
+    __calc_evt(inf);
+    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
+    
+    /* Access to schedule_data protected by schedule_lock */
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+    
+    curr = schedule_data[cpu].curr;
+    /* Currently-running domain should run at least for ctx_allow. */
+    min_time = curr->lastschd + curr->min_slice;
+    
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);   
+    
+    if ( is_idle_task(curr) || (min_time <= now) )
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
+        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
+}
+
+
+static void fbvt_sleep(struct domain *d)
+{
+    unsigned long flags;
+
+    
+    if ( test_bit(DF_RUNNING, &d->flags) )
+        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
+    else
+    {
+         /* The runqueue accesses must be protected */
+        spin_lock_irqsave(&CPU_INFO(d->processor)->run_lock, flags);       
+    
+        if ( __task_on_runqueue(RUNLIST(d)) )
+            __del_from_runqueue(RUNLIST(d));
+
+        spin_unlock_irqrestore(&CPU_INFO(d->processor)->run_lock, flags);
+    }
+}
+
 
 /**
  * fbvt_free_task - free FBVT private structures for a task
@@ -232,7 +340,7 @@ int fbvt_adjdom(struct domain *p,
         if ( mcu_adv == 0 )
             return -EINVAL;
         
-        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
+        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
         inf->mcu_advance = mcu_adv;
         inf->warp = warp;
         inf->warpl = warpl;
@@ -243,20 +351,18 @@ int fbvt_adjdom(struct domain *p,
                 p->domain, inf->mcu_advance, inf->warp,
                 inf->warpl, inf->warpu );
 
-        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, 
-                                                                        flags);
+        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
     }
     else if ( cmd->direction == SCHED_INFO_GET )
     {
         struct fbvt_dom_info *inf = FBVT_INFO(p);
 
-        spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);   
+        spin_lock_irqsave(&CPU_INFO(p->processor)->run_lock, flags);   
         params->mcu_adv = inf->mcu_advance;
         params->warp    = inf->warp;
         params->warpl   = inf->warpl;
         params->warpu   = inf->warpu;
-        spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, 
-                                                                        flags);
+        spin_unlock_irqrestore(&CPU_INFO(p->processor)->run_lock, flags);
     }
     
     return 0;
@@ -272,6 +378,7 @@ int fbvt_adjdom(struct domain *p,
  */
 static task_slice_t fbvt_do_schedule(s_time_t now)
 {
+    unsigned long flags;
     struct domain *prev = current, *next = NULL, *next_prime, *p;
     struct list_head   *tmp;
     int                 cpu = prev->processor;
@@ -288,6 +395,10 @@ static task_slice_t fbvt_do_schedule(s_time_t now)
 
     ASSERT(prev->sched_priv != NULL);
     ASSERT(prev_inf != NULL);
+    
+    spin_lock_irqsave(&CPU_INFO(cpu)->run_lock, flags);
+
+    ASSERT(__task_on_runqueue(RUNLIST(prev)));
 
     if ( likely(!is_idle_task(prev)) ) 
     {
@@ -365,6 +476,8 @@ static task_slice_t fbvt_do_schedule(s_time_t now)
             min_avt = p_inf->avt;
     }
 
+    spin_unlock_irqrestore(&CPU_INFO(cpu)->run_lock, flags);
+
     /* Extract the domain pointers from the dom infos */
     next        = next_inf->domain;
     next_prime  = next_prime_inf->domain;
@@ -475,7 +588,7 @@ static void fbvt_dump_cpu_state(int i)
     struct fbvt_dom_info *d_inf;
     struct domain *d;
 
-    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
+    spin_lock_irqsave(&CPU_INFO(i)->run_lock, flags);
     printk("svt=0x%08lX ", CPU_SVT(i));
 
     queue = RUNQUEUE(i);
@@ -494,7 +607,7 @@ static void fbvt_dump_cpu_state(int i)
             (unsigned long)list, (unsigned long)list->next,
             (unsigned long)list->prev);
     }
-    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);
+    spin_unlock_irqrestore(&CPU_INFO(i)->run_lock, flags);        
 }
 
 
@@ -519,13 +632,16 @@ int fbvt_init_scheduler()
     for ( i = 0; i < NR_CPUS; i++ )
     {
         schedule_data[i].sched_priv = xmalloc(sizeof(struct fbvt_cpu_info));
-        INIT_LIST_HEAD(RUNQUEUE(i));
+        
         if ( schedule_data[i].sched_priv == NULL )
         {
             printk("Failed to allocate FBVT scheduler per-CPU memory!\n");
             return -1;
         }
 
+        INIT_LIST_HEAD(RUNQUEUE(i));
+        spin_lock_init(&CPU_INFO(i)->run_lock);
         CPU_SVT(i) = 0; /* XXX do I really need to do this? */
     }
 
@@ -541,86 +657,7 @@ int fbvt_init_scheduler()
 
     return 0;
 }
-
-static void fbvt_sleep(struct domain *d)
-{
-    if ( test_bit(DF_RUNNING, &d->flags) )
-        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
-    else if ( __task_on_runqueue(RUNLIST(d)) )
-        __del_from_runqueue(RUNLIST(d));
-}
-
-static void fbvt_wake(struct domain *d)
-{
-    struct fbvt_dom_info *inf = FBVT_INFO(d);
-    struct domain        *curr;
-    s_time_t              now, min_time;
-    int                   cpu = d->processor;
-    s32                   io_warp;
-
-    /* If on the runqueue already then someone has done the wakeup work. */
-    if ( unlikely(__task_on_runqueue(RUNLIST(d))) )
-        return;
-    __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
  
-    now = NOW();
-
-#if 0
-    /*
-     * XXX KAF: This was fbvt_unpause(). Not sure if it's the right thing
-     * to do, in light of the stuff that fbvt_wake_up() does.
-     * e.g., setting 'inf->avt = CPU_SVT(cpu);' would make the later test
-     * 'inf->avt < CPU_SVT(cpu)' redundant!
-     */
-    if ( d->domain == IDLE_DOMAIN_ID )
-    {
-        inf->avt = inf->evt = ~0U;
-    } 
-    else 
-    {
-        /* Set avt to system virtual time. */
-        inf->avt = CPU_SVT(cpu);
-        /* Set some default values here. */
-        LAST_VTB(cpu) = 0;
-        __calc_evt(inf);
-    }
-#endif
-
-    /* Set the BVT parameters. */
-    if ( inf->avt < CPU_SVT(cpu) )
-    {
-        /*
-         * We want IO bound processes to gain dispatch precedence. It is 
-         * especially for device driver domains. Therefore AVT 
-         * not be updated to SVT but to a value marginally smaller.
-         * Since frequently sleeping domains have high time_slept
-         * values, the virtual time can be determined as:
-         * SVT - const * TIME_SLEPT
-         */
-        io_warp = (int)(0.5 * inf->time_slept);
-        if ( io_warp > 1000 )
-            io_warp = 1000;
-
-        ASSERT(inf->time_slept + CPU_SVT(cpu) > inf->avt + io_warp);
-        inf->time_slept += CPU_SVT(cpu) - inf->avt - io_warp;
-        inf->avt = CPU_SVT(cpu) - io_warp;
-    }
-
-    /* Deal with warping here. */
-    inf->warpback  = 1;
-    inf->warped    = now;
-    __calc_evt(inf);
-
-    curr = schedule_data[cpu].curr;
-    /* Currently-running domain should run at least for ctx_allow. */
-    min_time = curr->lastschd + curr->min_slice;
-    
-    if ( is_idle_task(curr) || (min_time <= now) )
-        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
-    else if ( schedule_data[cpu].s_timer.expires > (min_time + TIME_SLOP) )
-        mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
-} 
 
 struct scheduler sched_fbvt_def = {
     .name     = "Fair Borrowed Virtual Time",
index 49daaa05d2f9007de46472698f6f7098ff036fc1..b5ab6000e5430ae4777d9775a57d745f19eb4ac3 100644 (file)
@@ -23,6 +23,8 @@ struct rrobin_dom_info
     struct domain    *domain;
 };
 
+static spinlock_t run_locks[NR_CPUS];
+
 #define RR_INFO(d)      ((struct rrobin_dom_info *)d->sched_priv)
 #define RUNLIST(d)      (struct list_head *)&(RR_INFO(d)->run_list)
 #define RUNQUEUE(cpu)   RUNLIST(schedule_data[cpu].idle)
@@ -49,7 +51,10 @@ static int rr_init_scheduler()
     int i;
 
     for ( i = 0; i < NR_CPUS; i++ )
+    {
         INIT_LIST_HEAD(RUNQUEUE(i));
+        spin_lock_init(&run_locks[i]);
+    }
    
     dom_info_cache = xmem_cache_create("FBVT dom info", 
                                         sizeof(struct rrobin_dom_info), 
@@ -95,11 +100,11 @@ static int rr_init_idle_task(struct domain *p)
     if(rr_alloc_task(p) < 0) return -1;
     rr_add_task(p);
 
-    spin_lock_irqsave(&schedule_data[p->processor].schedule_lock, flags);
+    spin_lock_irqsave(&run_locks[p->processor], flags);
     set_bit(DF_RUNNING, &p->flags);
     if ( !__task_on_runqueue(RUNLIST(p)) )
          __add_to_runqueue_head(RUNLIST(p), RUNQUEUE(p->processor));
-    spin_unlock_irqrestore(&schedule_data[p->processor].schedule_lock, flags);
+    spin_unlock_irqrestore(&run_locks[p->processor], flags);
     return 0;
 }
 
@@ -107,11 +112,14 @@ static int rr_init_idle_task(struct domain *p)
 /* Main scheduling function */
 static task_slice_t rr_do_schedule(s_time_t now)
 {
+    unsigned long flags;
     struct domain *prev = current;
     int cpu = current->processor;
     
     task_slice_t ret;
-
+    
+    spin_lock_irqsave(&run_locks[cpu], flags);
+    
     if(!is_idle_task(prev))
     {
         __del_from_runqueue(RUNLIST(prev));
@@ -120,6 +128,8 @@ static task_slice_t rr_do_schedule(s_time_t now)
             __add_to_runqueue_tail(RUNLIST(prev), RUNQUEUE(cpu));
     }
     
+    spin_unlock_irqrestore(&run_locks[cpu], flags);
+    
     ret.task = list_entry(  RUNQUEUE(cpu).next->next, 
                             struct rrobin_dom_info, 
                             run_list)->domain;
@@ -149,27 +159,44 @@ static void rr_dump_settings()
 
 static void rr_sleep(struct domain *d)
 {
+    unsigned long flags;
+
     if ( test_bit(DF_RUNNING, &d->flags) )
         cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
-    else if ( __task_on_runqueue(RUNLIST(d)) )
-        __del_from_runqueue(RUNLIST(d));
+    else
+    {
+        spin_lock_irqsave(&run_locks[d->processor], flags);
+        if ( __task_on_runqueue(RUNLIST(d)) )
+            __del_from_runqueue(RUNLIST(d));
+        spin_unlock_irqrestore(&run_locks[d->processor], flags);
+    }
 }
 
 void rr_wake(struct domain *d)
 {
+    unsigned long       flags;
     struct domain       *curr;
-    s_time_t             now, min_time;
-    int                  cpu = d->processor;
+    s_time_t            now, min_time;
+    int                 cpu = d->processor;
 
+    spin_lock_irqsave(&run_locks[cpu], flags);
+    
     /* If on the runqueue already then someone has done the wakeup work. */
     if ( unlikely(__task_on_runqueue(RUNLIST(d))))
+    {
+        spin_unlock_irqrestore(&run_locks[cpu], flags);
         return;
+    }
 
     __add_to_runqueue_head(RUNLIST(d), RUNQUEUE(cpu));
+    spin_unlock_irqrestore(&run_locks[cpu], flags);
+
     now = NOW();
 
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
     curr = schedule_data[cpu].curr;
-
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
     /* Currently-running domain should run at least for ctx_allow. */
     min_time = curr->lastschd + curr->min_slice;
     
@@ -194,7 +221,7 @@ static void rr_dump_cpu_state(int i)
     int loop = 0;
     struct rrobin_dom_info *d_inf;
 
-    spin_lock_irqsave(&schedule_data[i].schedule_lock, flags);
+    spin_lock_irqsave(&run_locks[i], flags);
 
     queue = RUNQUEUE(i);
     printk("QUEUE rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
@@ -210,7 +237,7 @@ static void rr_dump_cpu_state(int i)
         d_inf = list_entry(list, struct rrobin_dom_info, run_list);
         rr_dump_domain(d_inf->domain);
     }
-    spin_unlock_irqrestore(&schedule_data[i].schedule_lock, flags);
+    spin_unlock_irqrestore(&run_locks[i], flags);
 }
 
 
index 4a8a1b061203639a9bd73c971dfd73e4ef1b83c7..7b12f8f5ab7aadf3bdbc9a0e2eb30fc97d34b3fc 100644 (file)
@@ -164,13 +164,13 @@ void init_idle_task(void)
 void domain_sleep(struct domain *d)
 {
     unsigned long flags;
-    int           cpu = d->processor;
 
-    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+    /* sleep and wake protected by domain's state_lock */
+    spin_lock_irqsave(&d->state_lock, flags);
     if ( likely(!domain_runnable(d)) )
         SCHED_OP(sleep, d);
-    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
-
+    spin_unlock_irqrestore(&d->state_lock, flags);
     /* Synchronous. */
     while ( test_bit(DF_RUNNING, &d->flags) && !domain_runnable(d) )
     {
@@ -182,8 +182,9 @@ void domain_sleep(struct domain *d)
 void domain_wake(struct domain *d)
 {
     unsigned long       flags;
-    int                 cpu = d->processor;
-    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+    spin_lock_irqsave(&d->state_lock, flags);
+    
     if ( likely(domain_runnable(d)) )
     {
         TRACE_2D(TRC_SCHED_WAKE, d->domain, d);
@@ -192,7 +193,8 @@ void domain_wake(struct domain *d)
         d->wokenup = NOW();
 #endif
     }
-    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+
+    spin_unlock_irqrestore(&d->state_lock, flags);
 }
 
 /* Block the currently-executing domain until a pertinent event occurs. */
@@ -323,9 +325,9 @@ void __enter_scheduler(void)
     s32                 r_time;     /* time for new dom to run */
 
     perfc_incrc(sched_run);
-
+    
     spin_lock_irq(&schedule_data[cpu].schedule_lock);
-
     now = NOW();
 
     rem_ac_timer(&schedule_data[cpu].s_timer);
@@ -349,9 +351,9 @@ void __enter_scheduler(void)
 
     r_time = next_slice.time;
     next = next_slice.task;
-
+    
     schedule_data[cpu].curr = next;
-
+    
     next->lastschd = now;
 
     /* reprogramm the timer */
index 7838d1feffe1cdbedb35a353aa0ec04e678f87dc..7f67e9ee48e09775d31a8f0480760a4fa27a3029 100644 (file)
@@ -101,6 +101,7 @@ struct domain
 
     /* Scheduling. */
     int              shutdown_code; /* code value from OS (if DF_SHUTDOWN). */
+    spinlock_t       state_lock;    /* wake/sleep lock                      */
     s_time_t         lastschd;      /* time this domain was last scheduled */
     s_time_t         lastdeschd;    /* time this domain was last descheduled */
     s_time_t         cpu_time;      /* total CPU time received till now */